library(NMF)
library(rsvd)
library(Rtsne)
library(ggplot2)
library(cowplot)
library(sva)
library(igraph)
library(cccd)
library(KernSmooth)
library(beeswarm)
library(stringr)
library(reshape2)
library(formatR)
library(destiny)
source("Fxns.R")
SalmHelm_UMIs <- load_data("./Extend_data/GSE92332_SalmHelm_UMIcounts.txt.gz")
## [1] "File exists!"
## 2017-12-26 03:09:40 INFO: Data dimensions: 15215x9842
SalmHelm_UMIs <- SalmHelm_UMIs[which(unlist(apply(SalmHelm_UMIs, 1, sum)) >
0), ]
SalmHelm_tpm <- data.frame(log2(1 + tpm(SalmHelm_UMIs)))
## 2017-12-26 03:09:51 INFO: Running TPM normalisation
v = get.variable.genes(SalmHelm_UMIs, min.cv2 = 100)
## 2017-12-26 03:10:34 INFO: Fitting only the 10363 genes with mean expression > 0.0128022759601707
## 2017-12-26 03:10:36 INFO: Found 1936 variable genes (p<0.05)
var.genes = as.character(rownames(v)[v$p.adj < 0.05]) # select genes
SalmHelm.all.cells <- colnames(SalmHelm_UMIs)
SalmHelm.all.genes <- rownames(SalmHelm_UMIs)
SalmHelm.condition <- unlist(lapply(SalmHelm.all.cells, function(x) return(str_split(x,
"_")[[1]][3]))) # cell by conditon
SalmHelm.cell.groups <- unlist(lapply(SalmHelm.all.cells, function(x) return(str_split(x,
"_")[[1]][4]))) # cell groups
SalmHelm.batches <- unlist(lapply(SalmHelm.all.cells, function(x) return(str_split(x,
"_")[[1]][1]))) # batches
# check batch effect
table(SalmHelm.batches)
## SalmHelm.batches
## B1 B10 B2 B3 B4 B5 B6 B7 B8 B9
## 840 950 200 1258 942 1490 631 1169 1542 820
batch_mean_tpm = group.means(counts = SalmHelm_tpm, groups = SalmHelm.batches)
x = batch_mean_tpm[, 1]
y = batch_mean_tpm[, 2]
expr.cor = round(cor(x, y), 2)
smoothScatter(x, y, nrpoints = Inf, pch = 16, cex = 0.25, main = sprintf("Before batch correction, correlation between \ntwo illustrative batches is %s",
expr.cor), xlab = "All genes Batch 2, mean log2(TPM+1)", ylab = "All genes Batch 1, mean log2(TPM+1)")
# remove the batch effect
SalmHelm_tpm_norm = batch.normalise.comBat(counts = as.matrix(SalmHelm_tpm),
batch.groups = SalmHelm.batches)
## Standardizing Data across genes
batch_mean_tpm_norm = group.means(counts = SalmHelm_tpm_norm, groups = SalmHelm.batches)
x = batch_mean_tpm_norm[, 1]
y = batch_mean_tpm_norm[, 2]
expr.cor = round(cor(x, y), 2)
smoothScatter(x, y, nrpoints = Inf, pch = 16, cex = 0.25, main = sprintf("After batch correction, correlation between \ntwo illustrative batches is %s",
expr.cor), xlab = "All genes Batch 2, mean log2(TPM+1)", ylab = "All genes Batch 1, mean log2(TPM+1)")
SalmHelm.tsne.rot <- PCA_TSNE.scores(data.tpm = SalmHelm_tpm_norm, data.umis = SalmHelm_UMIs,
var_genes = var.genes, data_name = "./Extend_data/SalmHelm", sig.pcs = TRUE,
is.var.genes = TRUE)
## ./Extend_data/SalmHelm_pca_scores.txt exists
## ./Extend_data/SalmHelm_tsne_scores.txt exists
SalmHelm.tsne.rot <- data.frame(SalmHelm.tsne.rot)
colnames(SalmHelm.tsne.rot) <- c("tSNE_1", "tSNE_2")
ggplot(data = SalmHelm.tsne.rot, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(colour = SalmHelm.cell.groups)) +
scale_color_manual(values = brewer16) + scale_fill_discrete() + theme(legend.title = element_blank()) +
ggtitle("SalmHelm")
ggplot(data = SalmHelm.tsne.rot, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(colour = SalmHelm.condition)) +
scale_color_manual(values = brewer16) + scale_fill_discrete() + theme(legend.title = element_blank()) +
ggtitle("SalmHelm")
genes <- c("Cd74", "Gpx2", "Hsph1")
All.Facet.tsne <- data.frame()
for (gene in genes) {
All.Facet.tsne <- rbind(All.Facet.tsne, Facet_wrap_fun(gene = gene, tpm.data = SalmHelm_tpm_norm,
tsne.data = SalmHelm.tsne.rot, condition = unique(SalmHelm.condition),
all.condition = SalmHelm.condition))
}
## There ara 4 conditions
## Whether creat data accurate 1
## There ara 4 conditions
## Whether creat data accurate 1
## There ara 4 conditions
## Whether creat data accurate 1
ggplot(data = All.Facet.tsne[All.Facet.tsne$Gene %in% genes[1], ], aes(x = tSNE_1,
y = tSNE_2, colour = Gene.Mp)) + geom_point() + ggtitle(genes[1]) + facet_wrap(~Condition,
nrow = 5, ncol = 2) + theme(legend.title = element_text(size = 10, color = "blue",
face = "bold"), legend.position = "right") + scale_color_gradient2(low = "lightblue",
mid = "green", high = "red", name = "Log2\nTPM+1")
ggplot(data = All.Facet.tsne[All.Facet.tsne$Gene %in% genes[2], ], aes(x = tSNE_1,
y = tSNE_2, colour = Gene.Mp)) + geom_point() + ggtitle(genes[2]) + facet_wrap(~Condition,
nrow = 5, ncol = 2) + theme(legend.title = element_text(size = 10, color = "blue",
face = "bold"), legend.position = "right") + scale_color_gradient2(low = "lightblue",
mid = "green", high = "red", name = "Log2\nTPM+1")
ggplot(data = All.Facet.tsne[All.Facet.tsne$Gene %in% genes[3], ], aes(x = tSNE_1,
y = tSNE_2, colour = Gene.Mp)) + geom_point() + ggtitle(genes[3]) + facet_wrap(~Condition,
nrow = 5, ncol = 2) + theme(legend.title = element_text(size = 10, color = "blue",
face = "bold"), legend.position = "right") + scale_color_gradient2(low = "lightblue",
mid = "green", high = "red", name = "Log2\nTPM+1")
SalmHelm.Tuft.tpm <- SalmHelm_tpm_norm[, SalmHelm.cell.groups %in% "Tuft"]
SalmHelm.Tuft.tsne <- SalmHelm.tsne.rot[SalmHelm.cell.groups %in% "Tuft", ]
SalmHelm.Tuft.pca <- read.table("./Extend_data/SalmHelm_pca_scores.txt")[SalmHelm.cell.groups %in%
"Tuft", ]
### Find the k to make 3 cluster k-nearest method
### Find_K<-function(K,pca.data,n=3){ dm<-as.matrix(dist(pca.data)) for(k in
### K){ knn<-build_knn_graph(dm,k=k) clustering<-cluster_graph(knn)$partition
### if(length(unique(clustering))==n){ cat(sprintf('Find the K:%d\n',k))
### return(k) break } } } K<-seq(2,200,by = 2) k<-Find_K(K = K,pca.data =
### SalmHelm.Tuft.pca[,1:15])
dm <- as.matrix(dist(SalmHelm.Tuft.pca[, 1:15]))
# build nearest neighbor graph
knn = build_knn_graph(dm, k = 74)
Tuft.clustering = cluster_graph(knn)$partition
Tuft_1_marker_genes <- as.character(read.table("./Extend_data/Tuft_1_marker_genes.txt")$V1)
Tuft_2_marker_genes <- as.character(read.table("./Extend_data/Tuft_2_marker_genes.txt")$V1)
Tuft.clustering <- paste("Tuft-", Tuft.clustering, sep = "")
Tuft.clustering <- ifelse(Tuft.clustering == "Tuft-1", "Tuft-1", ifelse(Tuft.clustering ==
"Tuft-2", "Tuft-2", "Progenitor"))
score_1 <- unlist(apply(SalmHelm.Tuft.tpm[Tuft_1_marker_genes, ], 2, mean, na.rm = TRUE))
score_2 <- unlist(apply(SalmHelm.Tuft.tpm[Tuft_2_marker_genes, ], 2, mean, na.rm = TRUE))
Dklk1 <- as.numeric(SalmHelm.Tuft.tpm["Dclk1", ])
Violin.data1 <- cbind(data.frame(Value = score_1), data.frame(Group = Tuft.clustering))
Violin.data2 <- cbind(data.frame(Value = score_2), data.frame(Group = Tuft.clustering))
Dklk1.data <- cbind(data.frame(Value = Dklk1), data.frame(Group = Tuft.clustering))
ggplot(data = Violin.data1, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) +
geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2),
color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Tuft-1 score") +
xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")
# stat_summary(fun.data=mean_sdl,geom='pointrange', color='red')
ggplot(data = Violin.data2, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) +
geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2),
color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Tuft-2 score") +
xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")
ggplot(data = Dklk1.data, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) +
geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2),
color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Dckl1") +
xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")
## Figure c.1
ggplot(SalmHelm.Tuft.tsne, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(color = score_1)) +
theme(legend.title = element_text(size = 8, color = "blue", face = "bold"),
legend.position = "right") + ggtitle("Tuft-1 Score") + scale_color_gradient2(low = "lightblue",
mid = "green", high = "red", name = "Log2\nTPM+1")
## Figure c.2
ggplot(SalmHelm.Tuft.tsne, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(color = score_2)) +
theme(legend.title = element_text(size = 8, color = "blue", face = "bold"),
legend.position = "right") + ggtitle("Tuft-2 Score") + scale_color_gradient2(low = "lightblue",
mid = "green", high = "red", name = "Log2\nTPM+1")
## Figure c.3
ggplot(data = SalmHelm.Tuft.tsne, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(colour = Tuft.clustering)) +
scale_color_manual(values = brewer16) + scale_fill_discrete() + theme(legend.title = element_blank()) +
ggtitle("Tuft")
Induction of antiparasitic genes by goblet cells after helminth infection
SalmHelm.Goblet.tpm <- SalmHelm_tpm_norm[, SalmHelm.cell.groups %in% "Goblet"]
SalmHelm.Goblet.condiion <- SalmHelm.condition[SalmHelm.cell.groups %in% "Goblet"]
SalmHelm.Goblet.condition.tpm <- SalmHelm.Goblet.tpm[, SalmHelm.Goblet.condiion %in%
c("Control", "Hpoly.Day3", "Hpoly.Day10")]
SalmHelm.Goblet.condiion3 <- SalmHelm.Goblet.condiion[SalmHelm.Goblet.condiion %in%
c("Control", "Hpoly.Day3", "Hpoly.Day10")]
Retnlb <- as.numeric(SalmHelm.Goblet.condition.tpm["Retnlb", ])
Wars <- as.numeric(SalmHelm.Goblet.condition.tpm["Wars", ])
Pnliprp2 <- as.numeric(SalmHelm.Goblet.condition.tpm["Pnliprp2", ])
Violin.Retnlb <- cbind(data.frame(Value = Retnlb), data.frame(Group = SalmHelm.Goblet.condiion3))
Violin.Wars <- cbind(data.frame(Value = Wars), data.frame(Group = SalmHelm.Goblet.condiion3))
Violin.Pnliprp2 <- cbind(data.frame(Value = Pnliprp2), data.frame(Group = SalmHelm.Goblet.condiion3))
## Figure d.1
ggplot(data = Violin.Retnlb, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) +
geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2),
color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Retnlb") +
xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")
## Figure d.2
ggplot(data = Violin.Wars, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) +
geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2),
color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Wars") +
xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")
## Figure d.3
ggplot(data = Violin.Pnliprp2, aes(x = Group, y = Value, colour = Group)) +
geom_violin(aes(fill = Group)) + geom_boxplot(width = 0.1, fill = "white") +
geom_jitter(shape = 16, position = position_jitter(0.2), color = "black") +
theme(legend.title = element_blank()) + ggtitle(label = "Pnliprp2") + xlab(NULL) +
ylab("Expression Level\nLog2(TPM+1)")